# SET DIRECTORY TO SOURCE FILE 

sink("../../tables/table2b_log.txt") #create log 

library(groundhog)
groundhog.library('apsrtable','2019-01-01',tolerate.R.version='4.1.2')

library(plyr)
library(tidyverse)

dat <- read.csv('../../data/cleaned/heuristics_cleaned_wide.csv', stringsAsFactors = FALSE)
dat$participant_republican <- ifelse(dat$political_party > 4, 1, 0)
dat$MC_republican <- ifelse(dat$reppartyoneletter == 'R', 1, 0)
dat$participant_MC_party_match = dat$participant_republican == dat$MC_republican
  #summary(lm(mcratingscale ~ participant_MC_party_match, dat)) # sanity checking

# Data at the level of votes/SIGs potentially shown
votes <- read.csv('../../data/Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)

# Iterate over votes to create new columns in dat for each vote possibly shown
dat$sig_rating_supportive_vote_shown <- NA

for(i in 1:nrow(votes)) {
  sig_id <- votes$sig_id[i]
  sig_name <- votes$sig_name[i]
  house_vote_id <- votes$house_vote_id[i]
  
  # If house vote is missing, need to set rating to NA since it would not be used as a treatment
  dat[is.na(dat[,paste0('repactualvote', house_vote_id)]),
      paste0('reprating', sig_id)] <- NA
  # Ok to leave house vote intact if SIG rating is missing here
  
  # did the SIG give a positive or negative rating to the MC?
  dat[,paste0('sig_rating_supportive_', sig_id)] <-
    ifelse(dat[,paste0('reprating', sig_id)] < 50, 0, 1)
  
  # Define thinks_rep_agrees as whether the respondent thinks the rep agrees with them on this issue
  dat[,paste0('thinks_rep_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repvoteperc', house_vote_id)])
  
  # As a control, add whether they actually agree
  dat[,paste0('rep_actually_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repactualvote', house_vote_id)])
  
  # If this was the vote that was shown, load this vote into the proper variable
  this.vote.shown <- which(dat$heur_randomrating_sig_id == sig_id)
  dat$sig_rating_supportive_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_rating_supportive_', sig_id)]
}

# Control for average values of treatment within version of survey shown (a or b)
for(version in c('a', 'b')) {
  sig.ids <- votes$sig_id[votes$split == version]
  
  # Average of whether SIG ratings (that could have been shown) were supportive
  dat$sig_rating_supportive_avg[dat$heur_survey_version == version] <-
    rowMeans(dat[dat$heur_survey_version == version, paste0('sig_rating_supportive_', sig.ids)], na.rm = TRUE)
}



# DV = average of whether person thinks they agree with their MC
dat$mean_thinks_rep_agrees <- rowMeans(dat[,startsWith(names(dat), 'thinks_rep_agrees_')], na.rm = TRUE)
dat$num_issue_qs <- rowSums(!is.na(dat[,startsWith(names(dat), 'thinks_rep_agrees_')]))
dat$mean_thinks_rep_agrees <- with(dat, mean_thinks_rep_agrees * num_issue_qs)

# Control variable = average of whether they actually agree
dat$actually_agrees_w_rep <- rowMeans(dat[,startsWith(names(dat), 'rep_actually_agrees_')], na.rm = TRUE) *
  dat$num_issue_qs

# Treatment = 0 if bad rating shown, 1 if good rating shown (here there is no control group; rating always shown)
controls <- as.character(tidyselect::vars_select(names(dat), starts_with('control_')))


# Regressions
linear.lm <- lm(
  as.formula(
    paste("mean_thinks_rep_agrees ~
                  sig_rating_supportive_vote_shown +
                  actually_agrees_w_rep + heur_survey_version +
                  factor(sig_rating_supportive_avg) + ",
          paste(controls, collapse= " + "))
  )
  , dat)

dat$mc_voter_same_party <- ifelse(dat$political_party >= 4, 1, -1) * ifelse(dat$MC_republican, 1, -1)
dat$mc_voter_same_party[dat$political_party==4] <- 0 # Missing from our preregistration

linear.lm.same.party <- lm(
  as.formula(
    paste("mean_thinks_rep_agrees ~
                  sig_rating_supportive_vote_shown +
                  actually_agrees_w_rep + heur_survey_version +
                  factor(sig_rating_supportive_avg) + ",
          paste(controls, collapse= " + "))
  )
  , dat,
  subset = mc_voter_same_party == 1)


linear.lm.diff.party <- lm(
  as.formula(
    paste("mean_thinks_rep_agrees ~
                  sig_rating_supportive_vote_shown +
                  actually_agrees_w_rep + heur_survey_version +
                  factor(sig_rating_supportive_avg) + ",
          paste(controls, collapse= " + "))
  )
  , dat,
  subset = mc_voter_same_party == -1)


#Table 2b
apsrtable::apsrtable(linear.lm,
                     linear.lm.same.party, linear.lm.diff.party,
                     digits = 3, stars = 'default',
                     omitcoef = list(
                       expression(grep("factor",coefnames)),
                       expression(grep("control_",coefnames))
                     )
)

sink()

#Robustness regressions mentioned in text
# No controls
# linear_nc.lm <- lm(
#   as.formula(
#     paste("mean_thinks_rep_agrees ~
#                   sig_rating_supportive_vote_shown +
#                    heur_survey_version +
#                   factor(sig_rating_supportive_avg) ")
#   )
#   , dat)
# summary(linear_nc.lm)

# No controls
# linear_one_MC.lm <- lm(
#   as.formula(
#     paste("mean_thinks_rep_agrees ~
#                   sig_rating_supportive_vote_shown +
#                   actually_agrees_w_rep + heur_survey_version +
#                   factor(sig_rating_supportive_avg) + ",
#           paste(controls, collapse= " + "))
#   )
#   ,  dat,subset = heur_num_candidates==1)
# 
# 
# summary(linear_one_MC.lm)